1 IMPORT

1.1 Import librairies

library(ggplot2)
library(RColorBrewer)
library(cowplot)
library(tidyverse)
library(dplyr)
library(Seurat)
library(SCpubr)
library(patchwork)
library(scico)

setwd("~/Projects/HumanThymusProject/")
source("~/Projects/HumanThymusProject/scripts/colors_universal.R")

1.2 Import data

The human and murine thymic seurat objects come from the Thymic Atlas generated by Park et al.. See script download_park_thymic_atlas.Rmd for how we obtained these seurat objects with appropriate metadata.

# import human & murine thymic atlases
seur.human <- readRDS("./data_github/park_dataset/park_seurat_human.rds")
seur.mouse <- readRDS("./data_github/park_dataset/park_seurat_mouse.rds")

2 HUMAN

2.1 Adapt cell annotation

# remove parathyroid cells
seur.human <- subset(seur.human, Anno_level_3 != "Epi_GCM2")

# Create a new level of annotation
seur.human@meta.data$Anno_curated <- seur.human@meta.data$Anno_level_1
seur.human@meta.data$Anno_curated <- case_when(
  seur.human@meta.data$Anno_curated=="TEC" & seur.human@meta.data$Anno_level_3=="cTEC" ~ "cTEC",
  # seur.human@meta.data$Anno_curated=="TEC" & seur.human@meta.data$Anno_level_3%in%c("mTEC", "TEC(myo)", "TEC(neuro)") ~ "mTEC",
  seur.human@meta.data$Anno_curated=="TEC" & seur.human@meta.data$Anno_level_3=="mTEC" ~ "mTEC",
  seur.human@meta.data$Anno_curated=="TEC" & seur.human@meta.data$Anno_level_3!="cTEC" & seur.human@meta.data$Anno_level_3!="mTEC" ~ "TEC_other",
  seur.human@meta.data$Anno_curated=="T"   & seur.human@meta.data$Anno_level_2=="DN" ~ "DN",
  seur.human@meta.data$Anno_curated=="T"   & seur.human@meta.data$Anno_level_2=="DP" ~ "DP",
  seur.human@meta.data$Anno_curated=="T"   & seur.human@meta.data$Anno_level_2=="SP" ~ "SP",
  seur.human@meta.data$Anno_curated=="Innate_T"   & seur.human@meta.data$Anno_level_3=="NKT" ~ "SP",
  seur.human@meta.data$Anno_curated=="Innate_T"   & seur.human@meta.data$Anno_level_3=="γδT" ~ "γδT",
  seur.human@meta.data$Anno_curated=="Endo"  ~ "Endothelial",
  seur.human@meta.data$Anno_curated=="Ery"   ~ "Erythrocyte",
  seur.human@meta.data$Anno_curated=="Mesen" ~ "Mesenchymal",
  seur.human@meta.data$Anno_curated=="Mgk"   ~ "Megakaryocyte",
  .default=seur.human@meta.data$Anno_curated
)

# sanity checks
table(seur.human@meta.data$Anno_curated, useNA="ifany")
## 
##               B            cTEC              DN              DP     Endothelial 
##            5082           10156           42474          108418             115 
##     Erythrocyte             HSC Innate_lymphoid            Mast   Megakaryocyte 
##             644             501            2176             148              36 
##     Mesenchymal            mTEC         Myeloid              SP       TEC_other 
##           21290            6448            4801           50476             480 
##             γδT 
##            2582
# expect:
# B               5,082                          
# Endo              115
# Ery               644
# HSC               501
# Innate_lymphoid 2,176
# γδT             2,582
# Mast              148
# Mesen          21,290
# Mgk                36
# Myeloid         4,801
# DN             42,474
# DP            108,418
# SP             50,476
# cTEC           10,156
# mTEC            6,448
# TEC_other         480
table(seur.human@meta.data[,c("Anno_curated", "Anno_level_1")], useNA="ifany")
##                  Anno_level_1
## Anno_curated           B   Endo    Ery    HSC Innate_T Innate_lymphoid   Mast
##   B                 5082      0      0      0        0               0      0
##   cTEC                 0      0      0      0        0               0      0
##   DN                   0      0      0      0        0               0      0
##   DP                   0      0      0      0        0               0      0
##   Endothelial          0    115      0      0        0               0      0
##   Erythrocyte          0      0    644      0        0               0      0
##   HSC                  0      0      0    501        0               0      0
##   Innate_lymphoid      0      0      0      0        0            2176      0
##   Mast                 0      0      0      0        0               0    148
##   Megakaryocyte        0      0      0      0        0               0      0
##   Mesenchymal          0      0      0      0        0               0      0
##   mTEC                 0      0      0      0        0               0      0
##   Myeloid              0      0      0      0        0               0      0
##   SP                   0      0      0      0      349               0      0
##   TEC_other            0      0      0      0        0               0      0
##   γδT                  0      0      0      0     2582               0      0
##                  Anno_level_1
## Anno_curated       Mesen    Mgk Myeloid      T    TEC
##   B                    0      0       0      0      0
##   cTEC                 0      0       0      0  10156
##   DN                   0      0       0  42474      0
##   DP                   0      0       0 108418      0
##   Endothelial          0      0       0      0      0
##   Erythrocyte          0      0       0      0      0
##   HSC                  0      0       0      0      0
##   Innate_lymphoid      0      0       0      0      0
##   Mast                 0      0       0      0      0
##   Megakaryocyte        0     36       0      0      0
##   Mesenchymal      21290      0       0      0      0
##   mTEC                 0      0       0      0   6448
##   Myeloid              0      0    4801      0      0
##   SP                   0      0       0  50127      0
##   TEC_other            0      0       0      0    480
##   γδT                  0      0       0      0      0
# define order to plot clusters
seur.human@meta.data$Anno_curated <- factor(
  seur.human@meta.data$Anno_curated,
  levels = c(
    "DN",
    "DP",
    "SP",
    "γδT",
    "cTEC",
    "mTEC",
    "TEC_other",
    "Innate_lymphoid",
    "B",
    "Myeloid",
    "Mesenchymal",
    "HSC",
    "Mast",
    "Erythrocyte",
    "Megakaryocyte",
    "Endothelial"
  )
)

Determine which clusters are most abundant.

# see how many cells there are per cluster
as.data.frame(table(seur.human$Anno_curated)) %>%
  mutate(totalcells=sum(Freq),
         percentcells=Freq*100/totalcells) %>%
  arrange(percentcells)
##               Var1   Freq totalcells percentcells
## 1    Megakaryocyte     36     255827   0.01407201
## 2      Endothelial    115     255827   0.04495225
## 3             Mast    148     255827   0.05785160
## 4        TEC_other    480     255827   0.18762679
## 5              HSC    501     255827   0.19583547
## 6      Erythrocyte    644     255827   0.25173262
## 7  Innate_lymphoid   2176     255827   0.85057480
## 8              γδT   2582     255827   1.00927580
## 9          Myeloid   4801     255827   1.87665884
## 10               B   5082     255827   1.98649869
## 11            mTEC   6448     255827   2.52045328
## 12            cTEC  10156     255827   3.96987026
## 13     Mesenchymal  21290     255827   8.32203012
## 14              DN  42474     255827  16.60262599
## 15              SP  50476     255827  19.73052102
## 16              DP 108418     255827  42.37942047
hu_clusters_abundant <- levels(seur.human@meta.data$Anno_curated)[!levels(seur.human@meta.data$Anno_curated) %in% c("Megakaryocyte", "Endothelial", "Mast", "HSC", "Erythrocyte")]

2.2 UMAP with clusters (Fig 7E)

Plot UMAP with cluster annotation.

celltypes_col <- c(
  "mTEC"            = "#CE3F37",
  "cTEC"            = "#8C6143",
  "TEC_other"= "#666666",
  "DN"              = "#FDCB89",
  "DP"              = "#9ecae1",
  "SP"              = "#7FC97F",
  "γδT"             = "#B6B1C9",
  "Mesenchymal"     = "#FEE791",
  "Myeloid"         = "#F2F59A",
  "B"               = "#2171b5",
  "Endothelial"     = "#7D449D",
  "Erythrocyte"     = "#CD1588",
  "HSC"             = "#9BB5A4",
  "Innate_lymphoid" = "#EDBB99",
  "Mast"            = "#D1B3BB",
  "Megakaryocyte"   = "#9ABDA4"
  )

# UMAP
Idents(seur.human) <- "Anno_curated"
fig7E_p1 <- do_DimPlot(
    seur.human,
    reduction = "umap",
    idents.keep = hu_clusters_abundant,
    na.value = "grey90",
    legend.position = "right",
    legend.ncol = 1,
    font.size = 30,
    legend.icon.size = 10
  ) +
    scale_color_manual(values = celltypes_col)

# if needed to rasterise to export plot
# fig7E_p1 <- ggrastr::rasterise(fig7E_p1, layers = "Point", dpi = 300)

Plot CD1D expression on FeaturePlot.

# Plot CD1d expression
fig7E_p2 <- do_FeaturePlot(
    seur.human,
    features = "CD1D",
    order = T,
    plot.title = "CD1D expression",
    border.color = "black",
    border.size = 2,
    reduction = "umap",
    pt.size = 1.2,
    legend.position = "right",
    font.size = 30
  ) &
    scale_colour_scico(
      palette = "lapaz",
      alpha = 0.8,
      begin = 0.1,
      end = 0.85,
      direction = -1
    )

# if needed to rasterise to export plot
# fig7E_p2 <- ggrastr::rasterise(fig7E_p2, layers = "Point", dpi = 300)

Combine plots (Fig 7E).

fig7E_p1 | fig7E_p2

2.3 Dotplot (Fig7F)

To note, in the manuscript, several clusters were removed (HSC, Mast, Erythrocyte, Megakaryocyte, Endothelial) because very few cells were captured, and they had very little level of CD1D, SLAMF1 or SLAMF6 detected.

DotPlot(
  seur.human,
  features = rev(c("CD1D", "SLAMF1", "SLAMF6")),
  group.by = "Anno_curated",
  cols = c("lightgrey", "darkred"),
  col.min = 0,
  dot.scale = 10
) +
  coord_flip() +
  theme_cowplot() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "", y = "")

3 MOUSE

3.1 Adapt cell annotation

# remove GEMM
seur.mouse <- subset(seur.mouse, age != "Rag1KO") # 34,073 cells

# adapt cell annotation
seur.mouse@meta.data$Anno_curated <- case_when(
  seur.mouse@meta.data$cell.types=="B"    ~ "B",
  seur.mouse@meta.data$cell.types %in% c("CD4+T", "CD8+T", "Treg", "αβT(entry)")  ~ "SP",
  seur.mouse@meta.data$cell.types %in% c("DN(P)", "DN(Q)")                        ~ "DN",
  seur.mouse@meta.data$cell.types %in% c("DP(P)", "DP(Q)")                        ~ "DP",
  seur.mouse@meta.data$cell.types=="Endo" ~ "Endothelial",
  seur.mouse@meta.data$cell.types=="Ery"  ~ "Erythrocyte",
  seur.mouse@meta.data$cell.types %in% c("Fb", "VSMC")                            ~ "Mesenchymal",
  seur.mouse@meta.data$cell.types %in% c("HSC", "NMP")                            ~ "HSC",
  seur.mouse@meta.data$cell.types %in% c("IELpA", "IELpB/NKT", "NK")              ~ "Innate_lymphoid",
  seur.mouse@meta.data$cell.types %in% c("DC1", "DC2", "aDC", "pDC")              ~ "Myeloid",
  seur.mouse@meta.data$cell.types %in% c("Mac", "Mono")                           ~ "Myeloid",
  seur.mouse@meta.data$cell.types %in% c("Epi_unknown", "TEC_early")              ~ "TEC_other",
  seur.mouse@meta.data$cell.types == "cTEC"  ~ "cTEC",
  seur.mouse@meta.data$cell.types == "mTEC"  ~ "mTEC",
  seur.mouse@meta.data$cell.types == "γδT"   ~ "γδT"
)

# sanity checks
table(seur.mouse@meta.data$Anno_curated, useNA="ifany")
## 
##               B            cTEC              DN              DP     Endothelial 
##             207            2917            6288           16104              55 
##     Erythrocyte             HSC Innate_lymphoid     Mesenchymal            mTEC 
##             123              89             878            1024             215 
##         Myeloid              SP       TEC_other             γδT 
##             379            5020             340             434
table(seur.mouse@meta.data[,c("Anno_curated", "cell.types")], useNA="ifany")
##                  cell.types
## Anno_curated         B CD4+T CD8+T  DC1  DC2 DN(P) DN(Q) DP(P) DP(Q) Endo
##   B                207     0     0    0    0     0     0     0     0    0
##   cTEC               0     0     0    0    0     0     0     0     0    0
##   DN                 0     0     0    0    0  4178  2110     0     0    0
##   DP                 0     0     0    0    0     0     0  7437  8667    0
##   Endothelial        0     0     0    0    0     0     0     0     0   55
##   Erythrocyte        0     0     0    0    0     0     0     0     0    0
##   HSC                0     0     0    0    0     0     0     0     0    0
##   Innate_lymphoid    0     0     0    0    0     0     0     0     0    0
##   Mesenchymal        0     0     0    0    0     0     0     0     0    0
##   mTEC               0     0     0    0    0     0     0     0     0    0
##   Myeloid            0     0     0   22  151     0     0     0     0    0
##   SP                 0  1483   815    0    0     0     0     0     0    0
##   TEC_other          0     0     0    0    0     0     0     0     0    0
##   γδT                0     0     0    0    0     0     0     0     0    0
##                  cell.types
## Anno_curated      Epi_unknown  Ery   Fb  HSC IELpA IELpB/NKT  Mac Mono   NK
##   B                         0    0    0    0     0         0    0    0    0
##   cTEC                      0    0    0    0     0         0    0    0    0
##   DN                        0    0    0    0     0         0    0    0    0
##   DP                        0    0    0    0     0         0    0    0    0
##   Endothelial               0    0    0    0     0         0    0    0    0
##   Erythrocyte               0  123    0    0     0         0    0    0    0
##   HSC                       0    0    0   88     0         0    0    0    0
##   Innate_lymphoid           0    0    0    0   285       185    0    0  408
##   Mesenchymal               0    0  952    0     0         0    0    0    0
##   mTEC                      0    0    0    0     0         0    0    0    0
##   Myeloid                   0    0    0    0     0         0   90   74    0
##   SP                        0    0    0    0     0         0    0    0    0
##   TEC_other                76    0    0    0     0         0    0    0    0
##   γδT                       0    0    0    0     0         0    0    0    0
##                  cell.types
## Anno_curated       NMP TEC_early Treg VSMC  aDC cTEC mTEC  pDC αβT(entry)  γδT
##   B                  0         0    0    0    0    0    0    0          0    0
##   cTEC               0         0    0    0    0 2917    0    0          0    0
##   DN                 0         0    0    0    0    0    0    0          0    0
##   DP                 0         0    0    0    0    0    0    0          0    0
##   Endothelial        0         0    0    0    0    0    0    0          0    0
##   Erythrocyte        0         0    0    0    0    0    0    0          0    0
##   HSC                1         0    0    0    0    0    0    0          0    0
##   Innate_lymphoid    0         0    0    0    0    0    0    0          0    0
##   Mesenchymal        0         0    0   72    0    0    0    0          0    0
##   mTEC               0         0    0    0    0    0  215    0          0    0
##   Myeloid            0         0    0    0   11    0    0   31          0    0
##   SP                 0         0   49    0    0    0    0    0       2673    0
##   TEC_other          0       264    0    0    0    0    0    0          0    0
##   γδT                0         0    0    0    0    0    0    0          0  434
# define order to plot
seur.mouse@meta.data$Anno_curated <- factor(
  seur.mouse@meta.data$Anno_curated,
  levels = c(
    "DN",
    "DP",
    "SP",
    "γδT",
    "cTEC",
    "mTEC",
    "TEC_other",
    "Innate_lymphoid",
    "B",
    "Myeloid",
    "Mesenchymal",
    "HSC",
    "Erythrocyte",
    "Endothelial"
  )
)

Determine which clusters are most abundant.

# see how many cells there are per cluster
as.data.frame(table(seur.mouse$Anno_curated)) %>%
  mutate(totalcells=sum(Freq),
         percentcells=Freq*100/totalcells) %>%
  arrange(percentcells)
##               Var1  Freq totalcells percentcells
## 1      Endothelial    55      34073    0.1614181
## 2              HSC    89      34073    0.2612039
## 3      Erythrocyte   123      34073    0.3609896
## 4                B   207      34073    0.6075192
## 5             mTEC   215      34073    0.6309982
## 6        TEC_other   340      34073    0.9978575
## 7          Myeloid   379      34073    1.1123177
## 8              γδT   434      34073    1.2737358
## 9  Innate_lymphoid   878      34073    2.5768204
## 10     Mesenchymal  1024      34073    3.0053121
## 11            cTEC  2917      34073    8.5610307
## 12              SP  5020      34073   14.7330731
## 13              DN  6288      34073   18.4544948
## 14              DP 16104      34073   47.2632289
ms_clusters_abundant <- levels(seur.mouse@meta.data$Anno_curated)[!levels(seur.mouse@meta.data$Anno_curated) %in% c("Endothelial", "HSC", "Erythrocyte")]

3.2 UMAP with clusters (Fig7A)

Plot UMAP with cluster annotation.

fig7A_p1 <- do_DimPlot(
  seur.mouse,
  reduction = "umap",
  group.by = "Anno_curated",
  idents.keep = ms_clusters_abundant,
  na.value = "grey90",
  legend.position = "right",
  legend.ncol = 1,
  font.size = 30,
  legend.icon.size = 10
) +
  scale_color_manual(values = celltypes_col)

# if needed to rasterise to export plot
# fig7A_p1 <- ggrastr::rasterise(fig7A_p1, layers="Point", dpi=300)

Plot Cd1d1 expression on FeaturePlot.

fig7A_p2 <- do_FeaturePlot(
  seur.mouse,
  features = "Cd1d1",
  plot.title = "Cd1d1 expression",
  order = T,
  border.color = "black",
  border.size = 2,
  reduction = "umap",
  pt.size = 1.2,
  legend.position = "right",
  font.size = 30
) &
  scale_colour_scico(
    palette = "lapaz",
    alpha = 0.8,
    begin = 0.1,
    end = 0.85,
    direction = -1
  )

# if needed to rasterise to export plot
# fig7A_p2 <- ggrastr::rasterise(fig7A_p2, layers="Point", dpi=300)

Combine plots (Fig 7A).

fig7A_p1 | fig7A_p2

3.3 Dotplot (Fig7B)

DotPlot(
  seur.mouse,
  features = rev(c("Cd1d1", "Slamf1", "Slamf6")),
  group.by = "Anno_curated",
  cols = c("lightgrey", "darkred"),
  col.min = 0,
  dot.scale = 10
) +
  coord_flip() +
  theme_cowplot() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "", y = "")

4 SESSION INFO

sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] scico_1.5.0.9000   patchwork_1.1.2    SCpubr_2.0.2       SeuratObject_4.1.3
##  [5] Seurat_4.3.0.1     forcats_1.0.0      stringr_1.5.0      dplyr_1.1.2       
##  [9] purrr_1.0.1        readr_2.1.4        tidyr_1.3.0        tibble_3.2.1      
## [13] tidyverse_1.3.2    cowplot_1.1.1      RColorBrewer_1.1-3 ggplot2_3.4.2     
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.2           backports_1.4.1        plyr_1.8.8            
##   [4] igraph_1.5.0           lazyeval_0.2.2         sp_2.0-0              
##   [7] splines_4.1.3          listenv_0.9.0          scattermore_1.2       
##  [10] digest_0.6.32          yulab.utils_0.0.6      htmltools_0.5.5       
##  [13] viridis_0.6.3          fansi_1.0.4            magrittr_2.0.3        
##  [16] tensor_1.5             googlesheets4_1.1.1    cluster_2.1.4         
##  [19] ROCR_1.0-11            tzdb_0.4.0             globals_0.16.2        
##  [22] modelr_0.1.11          matrixStats_1.0.0      timechange_0.2.0      
##  [25] spatstat.sparse_3.0-2  colorspace_2.1-0       rvest_1.0.3           
##  [28] ggrepel_0.9.3          haven_2.5.3            xfun_0.39             
##  [31] crayon_1.5.2           jsonlite_1.8.7         progressr_0.13.0      
##  [34] spatstat.data_3.0-1    survival_3.5-5         zoo_1.8-12            
##  [37] glue_1.6.2             polyclip_1.10-4        gtable_0.3.3          
##  [40] gargle_1.5.1           leiden_0.4.3           future.apply_1.11.0   
##  [43] abind_1.4-5            scales_1.2.1           DBI_1.1.3             
##  [46] spatstat.random_3.1-5  miniUI_0.1.1.1         Rcpp_1.0.10           
##  [49] viridisLite_0.4.2      xtable_1.8-4           gridGraphics_0.5-1    
##  [52] reticulate_1.30        htmlwidgets_1.6.2      httr_1.4.6            
##  [55] ellipsis_0.3.2         ica_1.0-3              farver_2.1.1          
##  [58] pkgconfig_2.0.3        sass_0.4.6             uwot_0.1.16           
##  [61] dbplyr_2.3.2           deldir_1.0-9           utf8_1.2.3            
##  [64] labeling_0.4.2         ggplotify_0.1.1        tidyselect_1.2.0      
##  [67] rlang_1.1.1            reshape2_1.4.4         later_1.3.1           
##  [70] munsell_0.5.0          cellranger_1.1.0       tools_4.1.3           
##  [73] cachem_1.0.8           cli_3.6.3              generics_0.1.3        
##  [76] broom_1.0.5            ggridges_0.5.4         evaluate_0.21         
##  [79] fastmap_1.1.1          yaml_2.3.7             goftest_1.2-3         
##  [82] knitr_1.43             fs_1.6.2               fitdistrplus_1.1-11   
##  [85] RANN_2.6.1             pbapply_1.7-2          future_1.33.0         
##  [88] nlme_3.1-162           mime_0.12              xml2_1.3.4            
##  [91] compiler_4.1.3         rstudioapi_0.14        plotly_4.10.2         
##  [94] png_0.1-8              spatstat.utils_3.0-3   reprex_2.0.2          
##  [97] bslib_0.5.0            stringi_1.7.12         highr_0.10            
## [100] lattice_0.21-8         Matrix_1.5-4.1         vctrs_0.6.3           
## [103] pillar_1.9.0           lifecycle_1.0.3        spatstat.geom_3.2-1   
## [106] lmtest_0.9-40          jquerylib_0.1.4        RcppAnnoy_0.0.21      
## [109] data.table_1.14.8      irlba_2.3.5.1          httpuv_1.6.11         
## [112] R6_2.5.1               promises_1.2.0.1       KernSmooth_2.23-21    
## [115] gridExtra_2.3          parallelly_1.36.0      codetools_0.2-19      
## [118] assertthat_0.2.1       MASS_7.3-60            withr_2.5.0           
## [121] sctransform_0.3.5      parallel_4.1.3         hms_1.1.3             
## [124] grid_4.1.3             rmarkdown_2.23         googledrive_2.1.1     
## [127] Rtsne_0.16             spatstat.explore_3.2-1 shiny_1.7.4           
## [130] lubridate_1.9.2